Data Analysis with Python in Jupyter Notebook using Plotly¶
In [1]:
# Libraries needed to run the code
import pandas as pd
import sqlalchemy as sa
import plotly.express as px
import plotly.graph_objects as go
from sqlalchemy.engine import create_engine
In [2]:
import pandas
dataframe = pandas.read_csv('/Users/diegogabrielvasquezgotopo/Desktop/Data Analyst Portfolio/SQL Database/Customer and Products/Orders.csv')
dataframe
Out[2]:
| OrderID | CreationDate | TotalDue | Status | CustomerID | SalespersonID | |
|---|---|---|---|---|---|---|
| 0 | 1000 | 5/14/16 0:00 | 118.22 | paid | 413 | 130 |
| 1 | 1001 | 7/31/16 0:00 | 211.22 | returned | 128 | 102 |
| 2 | 1002 | 7/9/16 0:00 | 139.81 | past due | 791 | 115 |
| 3 | 1003 | 4/4/16 0:00 | 323.71 | paid | 974 | 139 |
| 4 | 1004 | 2/16/16 0:00 | 214.02 | paid | 866 | 102 |
| ... | ... | ... | ... | ... | ... | ... |
| 195 | 1195 | 5/18/16 0:00 | 30.43 | paid | 107 | 116 |
| 196 | 1196 | 5/19/16 0:00 | 143.06 | paid | 862 | 115 |
| 197 | 1197 | 2/29/16 0:00 | 233.08 | paid | 712 | 134 |
| 198 | 1198 | 12/25/15 0:00 | 206.12 | cancelled | 968 | 130 |
| 199 | 1199 | 5/5/16 0:00 | 55.35 | paid | 672 | 147 |
200 rows × 6 columns
In [3]:
pip install psycopg2
Requirement already satisfied: psycopg2 in /opt/anaconda3/lib/python3.13/site-packages (2.9.11) Note: you may need to restart the kernel to use updated packages.
SQL CustomerandProducts Database Connection¶
In [3]:
import pandas as pd
from sqlalchemy import create_engine
username = 'postgres' # or your username
password = 'Pnlgsveb6' # your PostgreSQL password
host = 'localhost' # or the server IP
port = '5432' # default PostgreSQL port
database = 'CustomerandProducts'
engine = create_engine("postgresql+psycopg2://postgres:Pnlgsveb6@127.0.0.1:5432/CustomerandProducts")
In [4]:
import pandas as pd
pd.read_sql("SELECT table_name FROM information_schema.tables WHERE table_schema='public';", engine)
Out[4]:
| table_name | |
|---|---|
| 0 | customers_csv |
| 1 | v_orders |
| 2 | orders |
| 3 | product |
| 4 | salesperson |
| 5 | orderitem |
| 6 | customers |
In [5]:
df_orders = pd.read_sql("SELECT * FROM orders LIMIT 10;", engine)
df_orders
Out[5]:
| orderid | creationdate | totaldue | status | customerid | salespersonid | |
|---|---|---|---|---|---|---|
| 0 | 1000 | 2016-05-14 | 118.22 | paid | 413 | 130 |
| 1 | 1001 | 2016-07-31 | 211.22 | returned | 128 | 102 |
| 2 | 1002 | 2016-07-09 | 139.81 | past due | 791 | 115 |
| 3 | 1003 | 2016-04-04 | 323.71 | paid | 974 | 139 |
| 4 | 1004 | 2016-02-16 | 214.02 | paid | 866 | 102 |
| 5 | 1005 | 2016-07-25 | 7.38 | paid | 798 | 118 |
| 6 | 1006 | 2016-07-16 | 21.48 | paid | 606 | 133 |
| 7 | 1007 | 2015-06-30 | 157.13 | paid | 265 | 111 |
| 8 | 1008 | 2016-06-19 | 60.86 | paid | 831 | 146 |
| 9 | 1009 | 2016-07-01 | 276.78 | cancelled | 789 | 108 |
In [6]:
# Example: read data from the Customers table
df = pd.read_sql('SELECT * FROM customers;', engine)
# Display the first few rows
print(df.head())
customerid firstname lastname city state zipcode 0 100 Carol Shaw Seattle WA 98121 1 101 Elizabeth Carr Austin TX 78732 2 102 Ernest Ramos Kansas City MO 64199 3 103 Jane Carter Irving TX 75037 4 104 Martha Cooper Tampa FL 33625
In [7]:
df = pd.read_sql_query('Select * from V_orders', engine)
df.head()
Out[7]:
| orderdate | salesperson | amount_of_orders | total_due | |
|---|---|---|---|---|
| 0 | 2015-12-08 | Edward Kelley | 1 | 115.41 |
| 1 | 2016-06-20 | Marie Hall | 1 | 143.91 |
| 2 | 2016-03-12 | Victor Moore | 1 | 331.88 |
| 3 | 2015-10-29 | Joan Ruiz | 1 | 259.54 |
| 4 | 2016-05-01 | Sara Simpson | 1 | 250.92 |
In [8]:
import plotly.express as px
#Bar chart of sales by year
fig = px.bar(df, x='orderdate', y='total_due', title='Total Sales by Year')
fig.show()
In [ ]:
# SQL CustomerandProducts database connection
In [9]:
df['orderdate'] = pd.to_datetime(df['orderdate'])
#Create a 'year' column (as integer)
df['year'] = df['orderdate'].dt.year.astype(int)
#Group data by year and sum total sales
sales_by_year = df.groupby('year', as_index=False)['total_due'].sum()
# Create bar chart
fig = px.bar(
sales_by_year,
x='year',
y='total_due',
title='Total Sales by Year',
labels={'year': 'Year', 'total_due': 'Total Sales ($)'},
text_auto=True # shows values on top of bars
)
fig.show()
In [35]:
!pip install dash
Collecting dash Downloading dash-3.2.0-py3-none-any.whl.metadata (10 kB) Requirement already satisfied: Flask<3.2,>=1.0.4 in /opt/anaconda3/lib/python3.13/site-packages (from dash) (3.1.0) Requirement already satisfied: Werkzeug<3.2 in /opt/anaconda3/lib/python3.13/site-packages (from dash) (3.1.3) Requirement already satisfied: plotly>=5.0.0 in /opt/anaconda3/lib/python3.13/site-packages (from dash) (5.24.1) Requirement already satisfied: importlib-metadata in /opt/anaconda3/lib/python3.13/site-packages (from dash) (8.5.0) Requirement already satisfied: typing-extensions>=4.1.1 in /opt/anaconda3/lib/python3.13/site-packages (from dash) (4.12.2) Requirement already satisfied: requests in /opt/anaconda3/lib/python3.13/site-packages (from dash) (2.32.3) Collecting retrying (from dash) Downloading retrying-1.4.2-py3-none-any.whl.metadata (5.5 kB) Requirement already satisfied: nest-asyncio in /opt/anaconda3/lib/python3.13/site-packages (from dash) (1.6.0) Requirement already satisfied: setuptools in /opt/anaconda3/lib/python3.13/site-packages (from dash) (72.1.0) Requirement already satisfied: Jinja2>=3.1.2 in /opt/anaconda3/lib/python3.13/site-packages (from Flask<3.2,>=1.0.4->dash) (3.1.6) Requirement already satisfied: itsdangerous>=2.2 in /opt/anaconda3/lib/python3.13/site-packages (from Flask<3.2,>=1.0.4->dash) (2.2.0) Requirement already satisfied: click>=8.1.3 in /opt/anaconda3/lib/python3.13/site-packages (from Flask<3.2,>=1.0.4->dash) (8.1.8) Requirement already satisfied: blinker>=1.9 in /opt/anaconda3/lib/python3.13/site-packages (from Flask<3.2,>=1.0.4->dash) (1.9.0) Requirement already satisfied: MarkupSafe>=2.1.1 in /opt/anaconda3/lib/python3.13/site-packages (from Werkzeug<3.2->dash) (3.0.2) Requirement already satisfied: tenacity>=6.2.0 in /opt/anaconda3/lib/python3.13/site-packages (from plotly>=5.0.0->dash) (9.0.0) Requirement already satisfied: packaging in /opt/anaconda3/lib/python3.13/site-packages (from plotly>=5.0.0->dash) (24.2) Requirement already satisfied: zipp>=3.20 in /opt/anaconda3/lib/python3.13/site-packages (from importlib-metadata->dash) (3.21.0) Requirement already satisfied: charset-normalizer<4,>=2 in /opt/anaconda3/lib/python3.13/site-packages (from requests->dash) (3.3.2) Requirement already satisfied: idna<4,>=2.5 in /opt/anaconda3/lib/python3.13/site-packages (from requests->dash) (3.7) Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/anaconda3/lib/python3.13/site-packages (from requests->dash) (2.3.0) Requirement already satisfied: certifi>=2017.4.17 in /opt/anaconda3/lib/python3.13/site-packages (from requests->dash) (2025.10.5) Downloading dash-3.2.0-py3-none-any.whl (7.9 MB) ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 7.9/7.9 MB 49.0 MB/s eta 0:00:00 Downloading retrying-1.4.2-py3-none-any.whl (10 kB) Installing collected packages: retrying, dash ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 2/2 [dash]━━━━━━ 1/2 [dash] Successfully installed dash-3.2.0 retrying-1.4.2
In [10]:
#Web-based Dashboard using Dash
import dash
from dash import dcc
from dash import html
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd
fig = go.Figure()
for index, row in df.iterrows():
df_salesperson = df[df['salesperson']== row.salesperson]
fig.add_trace(
go.Scatter(x=list(df_salesperson.orderdate),
y=list(df_salesperson['total_due']),
name=row.salesperson,
text=list(df['total_due'].round(1)),
textposition="top left"))
salesperson_list = []
visibility = [False for i in range(len(df['salesperson']))]
for index, row in df.iterrows():
visibility = [False for i in range(len(df['salesperson']))]
visibility[index] = True
d = dict(label=row.salesperson,
method="update",
args=[{"visible": visibility},
{"title": "Order Summary",
"annotations": []}])
salesperson_list.append(d)
fig.update_layout(
updatemenus=[
dict(
active=0,
buttons=salesperson_list,
)
])
app = dash.Dash()
app.layout = html.Div([
dcc.Graph(figure=fig)
])
app.run(debug=True, use_reloader=False)
Total Sales by State¶
In [11]:
# 2️⃣ SQL JOIN: Combine Orders with Customers on customer_id
query = """
SELECT
c.state,
o.totaldue
FROM orders o
JOIN customers c
ON o.customerid = c.customerid;
"""
df = pd.read_sql(query, engine)
# Group by state and sum total sales
sales_by_state = df.groupby('state', as_index=False)['totaldue'].sum()
# bar chart
fig = px.bar(
sales_by_state,
x='state',
y='totaldue',
title='💰 Total Sales by State',
labels={'state': 'State', 'totaldue': 'Total Sales ($)'},
text_auto=True,
color='totaldue'
)
fig.show()
In [ ]:
Total sales by State (color shaded map)¶
In [12]:
# SQL JOIN: Orders + Customers to get each sale’s state
query = """
SELECT
c.state,
o.totaldue
FROM orders o
JOIN customers c
ON o.customerid = c.customerid;
"""
# Load joined data into DataFrame
df = pd.read_sql(query, engine)
# Group by state and sum total sales
sales_by_state = df.groupby('state', as_index=False)['totaldue'].sum()
# Ensure state abbreviations are uppercase (e.g., 'CA', 'TX')
sales_by_state['state'] = sales_by_state['state'].str.upper()
# Create U.S. Choropleth Map
fig = px.choropleth(
sales_by_state,
locations='state', # Column with state abbreviations
locationmode='USA-states', # Use two-letter state codes
color='totaldue', # Value to color by
scope='usa', # Show only the U.S.
color_continuous_scale='Blues',
title='💰 Total Sales by U.S. State'
)
# Show the map
fig.show()
In [13]:
import dash
from dash import dcc, html, Input, Output
import plotly.express as px
import pandas as pd
from sqlalchemy import create_engine
# 1️⃣ Connect to your PostgreSQL database
engine = create_engine("postgresql+psycopg2://postgres:YOUR_PASSWORD@localhost:5432/CustomerandProducts")
# 2️⃣ Load data (Orders + Customers)
query = """
SELECT
o.creationdate,
o.totaldue,
o.orderid,
c.state
FROM orders o
JOIN customers c
ON o.customerid = c.customerid;
"""
df = pd.read_sql(query, engine)
# 3️⃣ Prepare and clean data
df['creationdate'] = pd.to_datetime(df['creationdate'])
df['year'] = df['creationdate'].dt.year
df['state'] = df['state'].str.upper()
# 4️⃣ Initialize Dash app
app = dash.Dash(__name__)
app.title = "Sales by State Dashboard"
# 5️⃣ Layout with dropdowns and graph
app.layout = html.Div([
html.H1("💰 Total Sales by U.S. State", style={'textAlign': 'center'}),
html.Div([
html.Label("Select Year:"),
dcc.Dropdown(
id='year_dropdown',
options=[{'label': str(y), 'value': y} for y in sorted(df['year'].unique())],
value=df['year'].max(),
clearable=False
)
], style={'width': '30%', 'margin': 'auto'}),
dcc.Graph(id='sales_map')
])
# 6️⃣ Callback to update map
@app.callback(
Output('sales_map', 'figure'),
Input('year_dropdown', 'value')
)
def update_map(selected_year):
# Filter data by year
filtered_df = df[df['year'] == selected_year]
# Group by state
sales_by_state = filtered_df.groupby('state', as_index=False)['totaldue'].sum()
# Create map
fig = px.choropleth(
sales_by_state,
locations='state',
locationmode='USA-states',
color='totaldue',
scope='usa',
color_continuous_scale='Blues',
title=f'Total Sales by State - {selected_year}'
)
fig.update_layout(
geo=dict(bgcolor='rgba(0,0,0,0)'),
margin={"r":0,"t":50,"l":0,"b":0}
)
return fig
# 7️⃣ Run the app
if __name__ == "__main__":
app.run(debug=True)
In [14]:
df = pd.read_sql_query("""
Select Customers.state, Sum(Orders.totaldue) as total_sales from customers
join Orders on Orders.customerid = Customers.customerid
where Extract(year from CreationDate) = 2016
group by customers.state
""", engine
)
df.head()
Out[14]:
| state | total_sales | |
|---|---|---|
| 0 | AL | 454.07 |
| 1 | AZ | 292.89 |
| 2 | CA | 2359.89 |
| 3 | CO | 845.82 |
| 4 | CT | 107.40 |
In [ ]: